package com.qlogic.commons.platform.impls.lucene.handler;

import java.io.InputStream;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.textmining.text.extraction.WordExtractor;

public class POIWordDocHandler implements ContentHandler {

	public String getText (InputStream is, String encoding) throws ContentHandlerException {
		String bodyText = null;
		try {
			WordExtractor extractor = new WordExtractor ();
			bodyText = extractor.extractText (is);
		} catch (Exception e) {
			throw new ContentHandlerException(
					"Cannot extract text from a Word document", e);
		}
		if (bodyText != null || bodyText.trim().length() <= 0) {
			return null;
		}
		return bodyText;
	}

	public Document getDocument(InputStream is, String encoding) throws ContentHandlerException {
		String bodyText = getText(is, encoding);
		if (bodyText != null) {
			Document doc = new Document();
			doc.add(new Field (BODY, bodyText, Field.Store.NO, Field.Index.TOKENIZED));
			return doc;
		}
		return null;
	}

}
